Code
# Set options
knitr:: opts_chunk$ set (
echo = TRUE ,
warning = FALSE ,
message = FALSE ,
fig.align = 'center' ,
fig.retina = 2
)
rm (list= ls ())
library (tinytex)
Warning: package 'tinytex' was built under R version 4.5.2
Code
library (ggplot2)
#library(table1)
library (gt)
library (survival)
library (data.table)
library (randomForest)
library (grf)
library (policytree)
library (DiagrammeR)
#library(grid)
#library(forestploter)
#library(randomizr)
# library(devtools)
# install_github("larry-leon/weightedsurv", force = TRUE)
#install.packages("weightedsurv")
# install_github("larry-leon/forestsearch", force = TRUE)
library (forestsearch)
library (weightedsurv)
# Set theme for plots
theme_set (theme_minimal (base_size = 12 ))
Summary
Reproducing main GBSG analysis
Datasetup
Code
df.analysis <- gbsg
df.analysis <- within (df.analysis,{
id <- as.numeric (c (1 : nrow (df.analysis)))
# time to months
time_months <- rfstime/ 30.4375
grade3 <- ifelse (grade== "3" ,1 ,0 )
treat <- hormon
})
confounders.name <- c ("age" ,"meno" ,"size" ,"grade3" ,"nodes" ,"pgr" ,"er" )
outcome.name <- c ("time_months" )
event.name <- c ("status" )
id.name <- c ("id" )
treat.name <- c ("hormon" )
Kaplan-Meier curves and baseline summary
Code
dfcount <- df_counting (
df = df.analysis,
by.risk = 6 ,
tte.name = outcome.name,
event.name = event.name,
treat.name = treat.name
)
plot_weighted_km (dfcount, conf.int = TRUE , show.logrank = TRUE , ymax = 1.05 , xmed.fraction = 0.775 , ymed.offset = 0.125 )
Code
create_summary_table (data = df.analysis, treat_var = treat.name,
table_title = "GBSG Characteristics by Treatment Arm" ,
vars_continuous= c ("age" ,"nodes" ,"size" ,"er" ,"pgr" ),
vars_categorical= c ("grade" ,"grade3" ),
font_size = 12 )
Characteristic
Control (n=440)
Treatment (n=246)
P-value
SMD
age
Mean (SD)
51.1 (10.0)
56.6 (9.4)
<0.001
0.57
nodes
Mean (SD)
4.9 (5.6)
5.1 (5.3)
0.665
0.03
size
Mean (SD)
29.6 (14.4)
28.8 (14.1)
0.470
0.06
er
Mean (SD)
79.7 (124.2)
125.8 (191.1)
<0.001
0.30
pgr
Mean (SD)
102.0 (170.0)
124.3 (249.7)
0.213
0.11
grade
0.273
0.06
1
48 (10.9%)
33 (13.4%)
2
281 (63.9%)
163 (66.3%)
3
111 (25.2%)
50 (20.3%)
grade3
0.174
0.05
0
329 (74.8%)
196 (79.7%)
1
111 (25.2%)
50 (20.3%)
GRF analysis
Code
## GRF
grf_est1 <- grf.subg.harm.survival (data= df.analysis,
confounders.name = confounders.name,
outcome.name= outcome.name, event.name= event.name, id.name= id.name, treat.name= treat.name,
maxdepth = 2 , n.min = 60 , dmin.grf = 12 , frac.tau= 0.6 , details= TRUE )
tau, maxdepth = 46.75811 2
leaf.node control.mean control.size control.se depth
1 2 6.49 82.00 3.34 1
2 3 -4.10 604.00 1.06 1
11 4 -7.90 112.00 2.81 2
21 5 3.86 177.00 1.87 2
4 7 -5.89 356.00 1.33 2
Selected subgroup:
leaf.node control.mean control.size control.se depth
1 2 6.49 82.00 3.34 1
GRF subgroup found
Terminating node at max.diff (sg.harm.id):
[1] "er <= 0"
All splits:
[1] "er <= 0" "age <= 50" "age <= 43"
Code
# NOTE : In general for GRF trees
# leaf1 --> recommend control
# leaf2 --> recommend treatment
# Tree depth 1
plot (grf_est1$ tree1,leaf.labels= c ("Control" ,"Treat" ))
Code
# Tree depth 2
plot (grf_est1$ tree2,leaf.labels= c ("Control" ,"Treat" ))
Forestsearch with depth=2 (maxk = 2)
Code
# Setup parallel processing
library (doFuture)
library (doRNG)
registerDoFuture ()
registerDoRNG ()
system.time ({fs <- forestsearch (df.analysis, confounders.name = confounders.name,
outcome.name = "time_months" , treat.name = "hormon" , event.name = "status" , id.name = "id" ,
potentialOutcome.name = NULL ,
df.test = NULL ,
flag_harm.name = NULL ,
hr.threshold = 1.25 , hr.consistency = 1.0 , pconsistency.threshold = 0.90 ,
sg_focus = "maxSG" , max_subgroups_search = 30 ,
use_twostage = TRUE ,
showten_subgroups = TRUE , details= TRUE ,
conf_force = NULL ,
cut_type = "default" , use_grf = TRUE , plot.grf = TRUE , use_lasso = FALSE ,
maxk = 2 , fs.splits = 1000 ,
n.min = 60 , d0.min = 10 , d1.min = 10 ,
plot.sg = TRUE , by.risk = 6 ,
parallel_args = list (plan= "callr" , workers = 30 , show_message = TRUE )
)
})
=== Two-Stage Consistency Evaluation Enabled ===
Stage 1 screening splits: 30
Maximum total splits: 1000
Batch size: 20
================================================
GRF stage for cut selection with dmin, tau = 12 0.6
tau, maxdepth = 46.75811 2
leaf.node control.mean control.size control.se depth
1 2 6.49 82.00 3.34 1
2 3 -4.10 604.00 1.06 1
11 4 -7.90 112.00 2.81 2
21 5 3.86 177.00 1.87 2
4 7 -5.89 356.00 1.33 2
Selected subgroup:
leaf.node control.mean control.size control.se depth
1 2 6.49 82.00 3.34 1
GRF subgroup found
Terminating node at max.diff (sg.harm.id):
[1] "er <= 0"
All splits:
[1] "er <= 0" "age <= 50" "age <= 43"
GRF cuts identified: 3
Cuts: er <= 0, age <= 50, age <= 43
# of continuous/categorical characteristics 5 2
Continuous characteristics: age size nodes pgr er
Categorical characteristics: meno grade3
Default cuts included (1st 20) age <= mean(age) age <= median(age) age <= qlow(age) age <= qhigh(age) size <= mean(size) size <= median(size) size <= qlow(size) size <= qhigh(size) nodes <= mean(nodes) nodes <= median(nodes) nodes <= qlow(nodes) nodes <= qhigh(nodes) pgr <= mean(pgr) pgr <= median(pgr) pgr <= qlow(pgr) pgr <= qhigh(pgr) er <= mean(er) er <= median(er) er <= qlow(er) er <= qhigh(er)
Categorical: meno grade3
Factors per GRF: er <= 0 age <= 50 age <= 43
Initial GRF cuts included er <= 0 age <= 50 age <= 43
===== CONSOLIDATED CUT EVALUATION (IMPROVED) =====
Evaluating 25 cut expressions once and caching...
Cut evaluation summary:
Total cuts: 25
Valid cuts: 25
Errors: 0
✓ All 25 factors validated as 0/1
===== END CONSOLIDATED CUT EVALUATION =====
# of candidate subgroup factors= 25
[1] "er <= 0" "age <= 50" "age <= 43" "age <= 53.1" "age <= 53"
[6] "age <= 46" "age <= 61" "size <= 29.3" "size <= 25" "size <= 20"
[11] "size <= 35" "nodes <= 5" "nodes <= 3" "nodes <= 1" "nodes <= 7"
[16] "pgr <= 110" "pgr <= 32.5" "pgr <= 7" "pgr <= 131.8" "er <= 96.3"
[21] "er <= 36" "er <= 8" "er <= 114" "meno" "grade3"
Number of possible configurations (<= maxk): maxk = 2 , # combinations = 1275
Events criteria: control >= 10 , treatment >= 10
Subgroup search completed in 0.03 minutes
Found 25 subgroup candidate(s)
# of candidate subgroups (meeting all criteria) = 25
Removed 1 near-duplicate subgroups
Original rows: 25
After removal: 24
# of unique initial candidates: 24
# Restricting to top stop_Kgroups = 30
# of candidates to evaluate: 24
Algorithm: Two-stage sequential
Stage 1 splits: 30
Screen threshold: 0.763
Max total splits: 1000
Batch size: 20
Parallel processing: callr with 30 workers
*** Subgroup found: {er <= 0}
% consistency criteria met= 0.95
SG focus= maxSG
Subgroup Consistency Minutes= 0.053
Algorithm used: Two-stage sequential
Candidates evaluated: 24
Candidates passed: 7
Subgroup found (FS) with sg_focus='maxSG'
Selected subgroup: {er <= 0}
Minutes forestsearch overall = 0.09
Consistency algorithm used: twostage
user system elapsed
23.424 1.773 5.109
Code
plan ("sequential" )
# Results for estimation (training) data, which_df = "est" is default
res_tabs <- sg_tables (fs, ndecimals = 3 , which_df = "est" )
res_tabs$ sg10_out
Two-factor subgroups (maxk=2)
{er <= 0}
82
45
16
1.951
0.950
{er <= 0}
{pgr <= 32.5}
75
41
16
2.222
1.000
{er <= 0}
!{age <= 43}
68
38
14
2.164
0.960
{er <= 0}
{pgr <= 7}
64
34
13
1.992
0.920
{er <= 0}
{size <= 35}
61
34
15
2.537
0.970
{er <= 0}
{nodes <= 7}
61
31
11
2.335
0.950
{er <= 0}
!{size <= 20}
61
35
12
2.054
0.930
Search Configuration: Single-factor candidates (L) = 50; Maximum combinations evaluated = 1,275; Search depth (maxk) = 2
Search Results: Candidate subgroups found = 25; Maximum HR estimate = 2.54
Note: E1 = events in treatment arm; Pcons = consistency proportion
Code
Training data estimates
ITT
686 (100.0%)
246 (35.9%)
299 (43.6%)
66.3
50.2
7.8
0.69 (0.54, 0.89)
Questionable
82 (12.0%)
26 (31.7%)
45 (54.9%)
22.9
43.7
-14
1.95 (1.04, 3.67)
Recommend
604 (88.0%)
220 (36.4%)
254 (42.1%)
66.7
52.6
9.3
0.61 (0.47, 0.80)
Bootstrap Inference
Code
#output_dir <- "dev/vignettes-working/applications/gbsg/results"
output_dir <- "results/"
save_results <- dir.exists (output_dir)
# File pre-fix for saving
fileout_boot <- c ("gbsg-k2_v4_B=100" )
fileout_cv <- c ("gbsg-k2_v4_CV=20" )
# patchhwork needed for a combined bootstrap plot (otherwise if not avaialable will not produce)
library (patchwork)
# Number of bootstrap samples
NB <- 1000
system.time ({fs_bc <- forestsearch_bootstrap_dofuture (
fs.est = fs,
nb_boots = NB,
show_three = FALSE ,
details = TRUE )
})
Ystar matrix generated should be 'boots x N': 1000 x 686
ForestSearch parameters for bootstrap iterations:
- sg_focus: maxSG
- maxk: 2
- fs.splits: 1000
- max_subgroups_search: 30
- hr.threshold: 1.25
- hr.consistency: 1
- pconsistency.threshold: 0.9
- n.min: 60
- use_twostage: TRUE
- use_lasso: FALSE
- use_grf: TRUE
Bootstrap-specific overrides:
- grf_res: NULL (forces re-selection)
- grf_cuts: NULL (forces re-selection)
- parallel_args: sequential (prevents nested parallelism)
- details: FALSE (suppressed in workers)
- plot.sg: FALSE
- plot.grf: FALSE
=== Bootstrap Analysis Complete ===
Success rate: 92.9% (929/1000)
H (Questionable) Estimates:
Unadjusted: 1.95 (1.04,3.67)
Bias-corrected: 1.61 (0.84,3.08)
Hc (Recommend) Estimates:
Unadjusted: 0.61 (0.47,0.8)
Bias-corrected: 0.66 (0.46,0.95)
===================================
user system elapsed
11183.886 206.971 885.608
Code
plan ("sequential" )
if (save_results) {
filename <- file.path (output_dir,
paste0 (fileout_boot,
".RData" ))
save (df.analysis, fs, fs_bc, file = filename)
cat (" \n Results saved to:" , filename, " \n " )
}
Results saved to: results//gbsg-k2_v4_B=100.RData
Diagnostics and Summaries
Code
#load("~/Documents/GitHub/forestsearch/vignettes/results/sim_gbsg_example_B=1000.RData")
output_dir <- "results/"
load_results <- dir.exists (output_dir)
if (load_results){
filename <- file.path (output_dir,
paste0 (fileout_boot,".RData" ))
load (file = filename)
}
summaries <- summarize_bootstrap_results (
sgharm = fs$ sg.harm,
boot_results = fs_bc,
create_plots = TRUE ,
est.scale = "hr"
)
===============================================================
BOOTSTRAP ANALYSIS SUMMARY
===============================================================
BOOTSTRAP SUCCESS METRICS:
-------------------------------------------------------------
Total iterations: 1000
Successful subgroup ID: 929 (92.9%)
Failed to find subgroup: 71 (7.1%)
TIMING ANALYSIS:
-------------------------------------------------------------
Overall:
Total bootstrap time: 14.73 minutes (0.25 hours)
Average per iteration: 0.01 min (0.9 sec)
Per-iteration timing:
Mean: 0.19 min (11.5 sec)
Median: 0.19 min (11.3 sec)
Std Dev: 0.08 minutes
Range: [0.04, 0.47] minutes
IQR: [0.13, 0.24] minutes
ForestSearch timing (successful iterations only):
Iterations with FS: 1000 (100.0%)
Mean FS time: 0.19 min (11.5 sec)
Median FS time: 0.19 min (11.2 sec)
Total FS time: 191.92 minutes
FS time % of total: 1302.8%
Overhead timing (Cox models, bias correction, etc.):
Mean overhead: 0.00 min (0.0 sec)
Median overhead: 0.00 min (0.0 sec)
Total overhead: 0.21 minutes
Overhead % of total: 1.4%
PERFORMANCE ASSESSMENT:
-------------------------------------------------------------
Performance rating: ✓✓✓ Excellent
Average iteration speed: 0.9 seconds
===============================================================
Code
sg_tab <- summaries$ table
sg_tab
Bootstrap bias-corrected estimates (1000 iterations)
N
NT
Events
MedT
MedC
RMSTd
HR (95% CI)†
HR‡ (95% CI)
Qstnbl
82 (12.0%)
26 (31.7%)
45 (54.9%)
22.9
43.7
-14
1.95 (1.04, 3.67)
1.61 (0.84,3.08)
Recmnd
604 (88.0%)
220 (36.4%)
254 (42.1%)
66.7
52.6
9.3
0.61 (0.47, 0.80)
0.66 (0.46,0.95)
Note : Med = Median survival time (months). RMSTd = Restricted mean survival time difference. Subgroup identified in 92.9% of bootstrap samples.
Code
event_summary <- summarize_bootstrap_events (fs_bc, threshold = 12 )
=== Bootstrap Event Count Summary ===
Total bootstrap iterations: 1000
Event threshold: <12 events
ORIGINAL Subgroup H on BOOTSTRAP samples:
Control arm <12 events: 0 (0.0%)
Treatment arm <12 events: 0 (0.0%)
Either arm <12 events: 0 (0.0%)
ORIGINAL Subgroup Hc on BOOTSTRAP samples:
Control arm <12 events: 0 (0.0%)
Treatment arm <12 events: 0 (0.0%)
Either arm <12 events: 0 (0.0%)
NEW Subgroups found: 929 (92.9%)
NEW Subgroup H* on ORIGINAL data:
Control arm <12 events: 9 (1.0% of successful)
Treatment arm <12 events: 19 (2.0% of successful)
Either arm <12 events: 27 (2.9% of successful)
NEW Subgroup Hc* on ORIGINAL data:
Control arm <12 events: 0 (0.0% of successful)
Treatment arm <12 events: 2 (0.2% of successful)
Either arm <12 events: 2 (0.2% of successful)
Code
summaries$ diagnostics_table_gt
Analysis of 1000 bootstrap iterations
Success Rate
Total iterations
1000
Successful subgroup ID
929 (92.9%)
Failed to find subgroup
71 (7.1%)
Success rating
Excellent ✓✓✓
Subgroup H (Questionable)
Unadjusted estimate
1.95 (1.04, 3.67)
Bias-corrected estimate
1.61 (0.84, 3.08)
Bias correction impact
17.7%
CI width change
2.64 -> 2.24
Subgroup Hc (Recommend)
Unadjusted estimate
0.61 (0.47, 0.80)
Bias-corrected estimate
0.66 (0.46, 0.95)
Bias correction impact
7.2%
CI width change
0.33 -> 0.49
Bootstrap Quality: H
Valid iterations
929
Mean (SD)
0.47 (0.39)
Coefficient of variation
83.1%
Skewness
-0.01
Bootstrap Quality: Hc
Valid iterations
929
Mean (SD)
-0.42 (0.22)
Coefficient of variation
52.7%
Skewness
0.18
Search Performance
Mean max HR found
3.24 (1.26)
Mean factors evaluated
47.8
Mean combinations tried
1173
Proportion at maxk
--
Interpretation Guide:
✓ Excellent stability : Subgroup is consistently identified across bootstrap samples.
⚠ High variability : Bootstrap estimates are imprecise (CV >= 25%). Consider increasing nb_boots or sample size.
Code
summaries$ subgroup_summary$ original_agreement
Metric Value
<char> <char>
1: Total bootstrap iterations 1000
2: Successful iterations 929
3: Failed iterations (no subgroup) 71
4: Exact match with original 38 (4.1%)
5: Different from original 891 (95.9%)
Code
summaries$ subgroup_summary$ factor_presence
Rank Factor Count Percent
6 1 pgr 517 55.651238
7 2 size 383 41.227126
1 3 age 365 39.289559
2 4 er 305 32.831001
5 5 nodes 101 10.871905
4 6 meno 61 6.566200
3 7 grade3 45 4.843918
Code
summaries$ subgroup_summary$ factor_presence_specific
Rank Base_Factor Factor_Definition Count Percent
182 1 er {er <= 8} 95 10.22605
94 2 size !{size <= 25} 118 12.70183
Forest Search n-fold cross-validation
Code
output_dir <- "results/"
load_results <- dir.exists (output_dir)
if (load_results){
filename <- file.path (output_dir,
paste0 (fileout_boot,".RData" ))
load (file = filename)
}
# Kfolds = n (default to n-fold cross-validations)
fs_OOB <- NULL
fs_OOB <- forestsearch_Kfold (fs.est = fs, details = TRUE ,
parallel_args = list (plan = "callr" , workers = 36 , show_message = TRUE ))
Cross-validation setup:
- Observations: 686
- Folds: 686
- Fold sizes (range): 1-1
ForestSearch parameters for CV folds:
- sg_focus: maxSG
- maxk: 2
- fs.splits: 1000
- max_subgroups_search: 30
- hr.threshold: 1.25
- hr.consistency: 1
- pconsistency.threshold: 0.9
- n.min: 60
- use_twostage: TRUE
- use_lasso: FALSE
- use_grf: TRUE
- (per-fold parallel: sequential)
- (per-fold details: FALSE)
- (per-fold plot.sg: FALSE)
Cross-validation complete:
- Time: 8.66 minutes
- Subgroup found in 98.5 % of folds
Any found: 0.9854227
Exact match: 0.9620991
At least 1 match: 0.9620991
Cov 1 any: 0.9825073
Cov 2 any: NaN
Cov 1 and 2 any: 0
Cov 1 exact: 0.9620991
Cov 2 exact: NaN
Agreement (sens, ppv) in H and Hc: 0.8292683 0.9917219 0.9315068 0.9771615
Code
# Reset workers to single
plan (sequential)
summary_OOB <- forestsearch_KfoldOut (res= fs_OOB, details= TRUE , outall= TRUE )
Any found: 0.9854227
Exact match: 0.9620991
At least 1 match: 0.9620991
Cov 1 any: 0.9825073
Cov 2 any: NaN
Cov 1 and 2 any: 0
Cov 1 exact: 0.9620991
Cov 2 exact: NaN
Agreement (sens, ppv) in H and Hc: 0.8292683 0.9917219 0.9315068 0.9771615
Subgroup n n1 m1 m0 RMST
Overall "ITT" "686 (100.0%)" "246 (35.9%)" "66.3" "50.2" "7.8"
FA_0 "Not recommend" "82 (12.0%)" "26 (31.7%)" "22.9" "43.7" "-14"
KfA_0 "Not recommend" "73 (10.6%)" "21 (28.8%)" NA "42.9" "11"
FA_1 "Recommend" "604 (88.0%)" "220 (36.4%)" "66.7" "52.6" "9.3"
KfA_1 "Recommend" "613 (89.4%)" "225 (36.7%)" "66.2" "55" "6.4"
Hazard ratio
Overall "0.69 (0.54, 0.89)"
FA_0 "1.95 (1.04, 3.67)"
KfA_0 "0.53 (0.22, 1.25)"
FA_1 "0.61 (0.47, 0.80)"
KfA_1 "0.72 (0.56, 0.93)"
Code
table (summary_OOB$ SGs_found[,1 ])
!{age <= 43} !{size <= 25} !{size <= 29.3} {er <= 0} {er <= 8}
2 4 4 660 6
Code
table (summary_OOB$ SGs_found[,2 ])
!{meno} !{size <= 25} !{size <= 29.3} {age <= 47} {er <= 8}
3 1 2 2 8
{pgr <= 33}
1
Code
Ksims <- 200
fs_ten <- forestsearch_tenfold (fs.est = fs, sims = Ksims, Kfolds = 10 , details = TRUE ,
parallel_args = list (plan = "callr" , workers = 36 , show_message = TRUE ))
Starting repeated K-fold cross-validation:
- Simulations: 200
- Folds per simulation: 10
- Workers: 13
ForestSearch parameters for CV folds:
- sg_focus: maxSG
- maxk: 2
- fs.splits: 1000
- max_subgroups_search: 30
- hr.threshold: 1.25
- hr.consistency: 1
- pconsistency.threshold: 0.9
- n.min: 60
- use_twostage: TRUE
- use_lasso: FALSE
- use_grf: TRUE
- (per-fold parallel: sequential)
- (per-fold details: FALSE)
- (per-fold plot.sg: FALSE)
Repeated K-fold CV complete:
- Time: 20.1 minutes
- Successful simulations: 200 / 200
- Projected hours per 100 sims: 0.17
Code
# Reset workers to single
plan (sequential)
print (fs_ten$ find_summary)
Any Exact At least 1 Cov1 Cov2 Cov 1 & 2 Cov1 exact
0.8 0.2 0.2 0.5 NA 0.0 0.2
Cov2 exact
NA
Code
print (fs_ten$ sens_summary)
sens_H sens_Hc ppv_H ppv_Hc
0.4024390 0.9304636 0.4401670 0.9202306
Code
print (head (fs_ten$ sens_out))
sens_H sens_Hc ppv_H ppv_Hc
[1,] 0.3780488 0.9519868 0.5166667 0.9185304
[2,] 0.4024390 0.8956954 0.3437500 0.9169492
[3,] 0.3170732 0.9221854 0.3561644 0.9086460
[4,] 0.5121951 0.9337748 0.5121951 0.9337748
[5,] 0.2926829 0.9536424 0.4615385 0.9085174
[6,] 0.2804878 0.9221854 0.3285714 0.9042208
Code
print (head (fs_ten$ find_out))
Any Exact At least 1 Cov1 Cov2 Cov 1 & 2 Cov1 exact Cov2 exact
[1,] 0.7 0.3 0.3 0.4 NaN 0 0.3 NaN
[2,] 0.8 0.2 0.2 0.3 NaN 0 0.2 NaN
[3,] 0.8 0.1 0.1 0.5 NaN 0 0.1 NaN
[4,] 0.9 0.2 0.2 0.7 NaN 0 0.2 NaN
[5,] 0.5 0.2 0.2 0.4 NaN 0 0.2 NaN
[6,] 0.7 0.1 0.1 0.3 NaN 0 0.1 NaN
Code
# Save all results
output_dir <- "results/"
save_results <- dir.exists (output_dir)
if (save_results) {
filename <- file.path (output_dir,
paste0 (fileout_cv,
".RData" ))
save (df.analysis, fs, fs_bc, fs_ten, fs_OOB, file = filename)
cat (" \n Results saved to:" , filename, " \n " )
}
Results saved to: results//gbsg-k2_v4_CV=20.RData
Code
output_dir <- "results/"
load_results <- dir.exists (output_dir)
if (load_results){
filename <- file.path (output_dir,
paste0 (fileout_cv,".RData" ))
load (file = filename)
}
#' # Define subgroups to display
subgroups <- list (
age_gt65 = list (
subset_expr = "age > 65" ,
name = "age > 65" ,
type = "reference"
),
age_lt65 = list (
subset_expr = "age <= 65" ,
name = "age <= 65" ,
type = "reference"
),
pgr_positive = list (
subset_expr = "pgr > 0" ,
name = "pgr > 0" ,
type = "reference"
),
pgr_negative = list (
subset_expr = "pgr <= 0" ,
name = "pgr <= 0" ,
type = "reference"
)
)
# Create the forest plot
result <- plot_subgroup_results_forestplot (
fs_results = list (fs.est = fs, fs_bc = fs_bc, fs_OOB = fs_OOB, fs_kfold = fs_ten),
df_analysis = df.analysis,
subgroup_list = subgroups,
outcome.name = "time_months" ,
event.name = "status" ,
treat.name = "hormon" ,
E.name = "Hormon" ,
C.name = "CT" ,
ci_column_spaces = 25
)
# Display the plot
plot (result$ plot)